# -*- coding: utf-8 -*-
'''
Created on Apr 6, 2013

@author: LONG HOANG GIANG
'''

import sys, os
from urlparse import urljoin
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
import pyLib
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%d/%m/%Y %H:%M:%S')

def getListStory(url):
    tree = pyLib.loadWeb(url).build_tree()
    

def process():
    
    url = 'http://www.truyenviet.com/truyen-nguoi-lon'
    tree = pyLib.loadWeb(url).build_tree()
    
    for node in tree.xpath("//div[@class='contentdescription']/following-sibling::ul[1]/li/a"):
        name = pyLib.stringify(node)
        href = node.get('href', '').strip()
        if href == '' or 'Đồng Tình Luyến Ái' in name: continue
        href = urljoin('http://truyenviet.com', href)
        print name, href



if __name__ == '__main__':
    
    logging.info("Start crawl truyenviet")
    
#    process()
    getListStory('http://truyenviet.com/truyen-nguoi-lon/5-0-9')
    
    logging.info("Finished")
    os._exit(1)